View Javadoc
1 //============================================================================== 2 // file : XMLHelper.java 3 // project: Java Common Utility 4 // 5 // last change: date: $Date: 2003/09/10 09:22:14 $ 6 // by: $Author: bitiboy $ 7 // revision: $Revision: 1.1 $ 8 //------------------------------------------------------------------------------ 9 // copyright: GNU GPL Software License (see class documentation) 10 //============================================================================== 11 package com.justhis.xml; 12 13 14 /* 15 *$Id: XMLHelper.java,v 1.1 2003/09/10 09:22:14 bitiboy Exp $ 16 * 17 * Copyright 2003 Acai Software All Rights Reserved. 18 * 19 * This file XMLHelper.java is part of the Java Common Utility 20 21 * The Java Common Utility is free software; you can redistribute it and/or modify 22 * it under the terms of the GNU General Public License as published by 23 * the Free Software Foundation; either version 2 of the License, or 24 * (at your option) any later version. 25 26 * Java Common Utility is distributed in the hope that it will be useful, 27 * but WITHOUT ANY WARRANTY; without even the implied warranty of 28 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 29 * GNU General Public License for more details. 30 31 * You should have received a copy of the GNU General Public License 32 * along with the Java Common Utility; if not, write to the Free Software 33 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 34 35 * http://www.justhis.com 36 * CONTACT: email = superaxis@sohu.com webmaster@justhis.com 37 */ 38 import com.justhis.util.XParameters; 39 import com.justhis.util.exception.XMLException; 40 41 import org.apache.xerces.dom.DocumentImpl; 42 import org.apache.xerces.parsers.DOMParser; 43 44 import org.apache.xml.serialize.OutputFormat; 45 import org.apache.xml.serialize.XMLSerializer; 46 47 import org.w3c.dom.Document; 48 import org.w3c.dom.Element; 49 import org.w3c.dom.NodeList; 50 51 import org.xml.sax.InputSource; 52 import org.xml.sax.SAXException; 53 54 import java.io.File; 55 import java.io.FileOutputStream; 56 import java.io.FileWriter; 57 import java.io.IOException; 58 import java.io.PrintStream; 59 import java.io.StringReader; 60 import java.io.StringWriter; 61 62 import java.net.MalformedURLException; 63 import java.net.URL; 64 import java.net.URLConnection; 65 66 import java.util.Date; 67 import java.util.Enumeration; 68 69 import javax.xml.parsers.DocumentBuilder; 70 import javax.xml.parsers.DocumentBuilderFactory; 71 import javax.xml.parsers.ParserConfigurationException; 72 import javax.xml.transform.Transformer; 73 import javax.xml.transform.TransformerConfigurationException; 74 import javax.xml.transform.TransformerException; 75 import javax.xml.transform.TransformerFactory; 76 import javax.xml.transform.dom.DOMResult; 77 import javax.xml.transform.dom.DOMSource; 78 import javax.xml.transform.stream.StreamSource; 79 80 81 /*** 82 * TODO DOCUMENT ME! 83 * 84 * @author <a href="http://blog.ejb.cn">acai</a> 85 * @version $Revision: 1.1 $ 86 */ 87 public class XMLHelper { 88 //~ Methods ---------------------------------------------------------------- 89 90 /*** 91 * TODO DOCUMENT ME! 92 * 93 * @param doc TODO 94 * 95 * @return TODO 96 * 97 * @throws XMLException TODO 98 */ 99 public static String convertXMLToString(Document doc) 100 throws XMLException { 101 try { 102 OutputFormat of = new OutputFormat(doc); 103 of.setIndenting(true); 104 105 StringWriter sw = new StringWriter(); 106 XMLSerializer serializer = new XMLSerializer(sw, of); 107 serializer.serialize(doc); 108 109 return sw.toString(); 110 } catch (IOException ioe) { 111 throw new XMLException("Unable to write to the string", ioe); 112 } 113 } 114 115 /*** 116 * TODO DOCUMENT ME! 117 * 118 * @return TODO 119 */ 120 public static Document createXml() { 121 Document doc = new DocumentImpl(); 122 123 return doc; 124 } 125 126 /*** 127 * TODO DOCUMENT ME! 128 * 129 * @param root TODO 130 * 131 * @return TODO 132 */ 133 public static Document createXml(String root) { 134 Document doc = new DocumentImpl(); 135 doc.appendChild(doc.createElement(root)); 136 137 return doc; 138 } 139 140 /* 141 public static Document tidyHTML(String url) throws XMLException { 142 return tidyHTML(convertStringToURL(url)); 143 } 144 145 public static Document tidyHTML(URL url) throws XMLException { 146 try { 147 URLConnection inConnection = url.openConnection(); 148 if (inConnection.getContentType().startsWith("text/xml") || 149 inConnection.getContentType().startsWith("text/xhtml")) { 150 // All ready an XML source 151 return parseXMLFromURL(url); 152 } else if (inConnection.getContentType().startsWith("text/html")) { 153 // An HTML source 154 InputStream is = inConnection.getInputStream(); 155 156 // Clean the input stream 157 ByteArrayOutputStream out = new ByteArrayOutputStream(); 158 159 int totalBytes = 0; 160 byte[] buffer = new byte[16384]; 161 162 while (true) { 163 int bytesRead = is.read(buffer, 0, buffer.length); 164 if (bytesRead < 0) break; 165 // Remove binary bellow space except tab and newline 166 for (int i=0; i < bytesRead; i++) { 167 byte b = buffer[i]; 168 if (b < 32 && b!= 10 && b != 13 && b != 9) b = 32; 169 buffer[i] = b; 170 } 171 out.write(buffer, 0, bytesRead); 172 totalBytes += bytesRead; 173 } 174 is.close(); 175 out.close(); 176 177 String outContent = out.toString(); 178 InputStream in = new ByteArrayInputStream(out.toByteArray()); 179 180 org.w3c.tidy.TagTable tags = org.w3c.tidy.TagTable.getDefaultTagTable(); 181 tags.defineBlockTag("script"); 182 tags.defineBlockTag("nowrap"); 183 184 Tidy tidy = new Tidy(); 185 186 //tidy.setMakeClean(true); 187 tidy.setShowWarnings(false); 188 tidy.setXmlOut(true); 189 tidy.setXmlPi(false); 190 tidy.setDocType("omit"); 191 //tidy.setQuoteNbsp(true); 192 //tidy.setQuoteAmpersand(true); 193 tidy.setXHTML(false); 194 tidy.setRawOut(true); 195 tidy.setNumEntities(true); 196 tidy.setQuiet(true); 197 tidy.setFixComments(true); 198 tidy.setIndentContent(true); 199 tidy.setCharEncoding(org.w3c.tidy.Configuration.ASCII); 200 201 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 202 //Document resultDoc = tidy.parseDOM(in, null); 203 //if (result == null) System.err.println("Null sucker"); 204 //tidy.pprint(resultDoc, baos); 205 206 org.w3c.tidy.Node tNode = tidy.parse(in, baos); 207 String result = "<?xml version=\"1.0\" encoding=\"ISO-8859-1\" ?>\n" + 208 baos.toString(); 209 // Strip the DOCTYPE and script elements 210 int startIndex = 0; 211 int endIndex = 0; 212 if ((startIndex = result.indexOf("<!DOCTYPE")) >= 0) { 213 endIndex = result.indexOf(">",startIndex); 214 result = result.substring(0,startIndex) + 215 result.substring(endIndex + 1, result.length()); 216 } 217 while ((startIndex = result.indexOf("<script")) >= 0) { 218 endIndex = result.indexOf("</script>"); 219 result = result.substring(0,startIndex) + 220 result.substring(endIndex + 9, result.length()); 221 } 222 223 in.close(); 224 baos.close(); 225 226 return parseXMLFromString(result); 227 228 } else { 229 throw new XMLException("Unable to tidy content type: " + 230 inConnection.getContentType()); 231 } 232 } catch (IOException ioe) { 233 throw new XMLException("Unable to perform input/output", ioe); 234 } 235 } 236 */ 237 public static void main(String[] args) { 238 if (args.length < 3) { 239 printUsage(); 240 System.exit(0); 241 } 242 243 File xml_in = new File(args[0]); 244 File xsl_in = new File(args[1]); 245 File xml_out = new File(args[2]); 246 247 Date[] timestamps = new Date[5]; 248 249 String encoding = null; 250 251 if ((args.length > 3) && !args[3].equals("report")) { 252 encoding = args[3]; 253 } 254 255 try { 256 timestamps[0] = new Date(); 257 258 Document xml = parseXMLFromFile(xml_in); 259 260 if (encoding != null) { 261 xml.getDocumentElement().setAttribute("locale", encoding); 262 } 263 264 timestamps[1] = new Date(); 265 266 Document xsl = parseXMLFromFile(xsl_in); 267 timestamps[2] = new Date(); 268 269 Document result = transformXML(xml, xsl); 270 timestamps[3] = new Date(); 271 outputXMLToFile(result, xml_out.getAbsolutePath()); 272 timestamps[4] = new Date(); 273 } catch (Exception ex) { 274 System.err.println("An Error was encountered: " + ex.getMessage()); 275 System.exit(0); 276 } 277 278 if (((args.length > 3) && args[3].equals("report")) 279 || ((args.length > 4) && args[4].equals("report")) 280 ) { 281 System.out.println("Time to parse XML input: " 282 + getTimeDiff(timestamps[0], timestamps[1]) 283 ); 284 System.out.println("Time to parse XSL input: " 285 + getTimeDiff(timestamps[1], timestamps[2]) 286 ); 287 System.out.println("Time to perform transformation: " 288 + getTimeDiff(timestamps[2], timestamps[3]) 289 ); 290 System.out.println("Time to output XML/HTML: " 291 + getTimeDiff(timestamps[3], timestamps[4]) 292 ); 293 System.out.println("Total elapsed time: " 294 + getTimeDiff(timestamps[0], timestamps[4]) 295 ); 296 } 297 } 298 299 /*** 300 * Copies the children of the mergeFromXML element and places them as 301 * children in the mergeToXML. Of course it recursively gets the children 302 * as well. If childrenOnly is set to false, just insert the mergeFromXML 303 * under the mergeToXML. 304 * 305 * @param mergeToXML TODO 306 * @param mergeFromXML TODO 307 * @param childrenOnly TODO 308 */ 309 public static void mergeXML(Element mergeToXML, Element mergeFromXML, 310 boolean childrenOnly 311 ) { 312 Document toDoc = mergeToXML.getOwnerDocument(); 313 Element copyElem = (Element) (toDoc.importNode(mergeFromXML, true)); 314 315 if (childrenOnly) { 316 NodeList nlist = copyElem.getChildNodes(); 317 318 for (int i = 0; i < nlist.getLength(); i++) { 319 org.w3c.dom.Node n = nlist.item(i); 320 mergeToXML.appendChild(n); 321 } 322 323 return; 324 } else { 325 mergeToXML.appendChild(copyElem); 326 } 327 } 328 329 /*** 330 * TODO DOCUMENT ME! 331 * 332 * @param text TODO 333 * @param fileName TODO 334 * 335 * @throws XMLException TODO 336 */ 337 public static void outputTextToFile(String text, String fileName) 338 throws XMLException { 339 try { 340 File f = new File(fileName); 341 File dir = new File(f.getParent()); 342 dir.mkdirs(); 343 344 FileWriter fw = new FileWriter(f); 345 fw.write(text); 346 fw.flush(); 347 fw.close(); 348 } catch (IOException ioe) { 349 throw new XMLException("Unable to write to the given file", ioe); 350 } 351 } 352 353 /*** 354 * TODO DOCUMENT ME! 355 * 356 * @param doc TODO 357 * @param stream TODO 358 * 359 * @throws XMLException TODO 360 */ 361 public static void outputXML(Document doc, PrintStream stream) 362 throws XMLException { 363 try { 364 OutputFormat of = new OutputFormat(doc); 365 of.setIndenting(true); 366 367 XMLSerializer serializer = new XMLSerializer(stream, of); 368 serializer.serialize(doc); 369 } catch (IOException ioe) { 370 throw new XMLException("Unable to write to the given print stream", 371 ioe 372 ); 373 } 374 } 375 376 /*** 377 * TODO DOCUMENT ME! 378 * 379 * @param doc TODO 380 * @param fileName TODO 381 * 382 * @throws XMLException TODO 383 */ 384 public static void outputXMLToFile(Document doc, String fileName) 385 throws XMLException { 386 try { 387 if (doc == null) { 388 throw new IOException("Output XML document was null"); 389 } 390 391 OutputFormat of = new OutputFormat(doc, "UTF-8", true); 392 File f = new File(fileName); 393 FileOutputStream fos = new FileOutputStream(f); 394 XMLSerializer serializer = new XMLSerializer(fos, of); 395 serializer.serialize(doc); 396 fos.close(); 397 } catch (IOException ioe) { 398 throw new XMLException("Unable to write to the given file", ioe); 399 } 400 } 401 402 /*** 403 * TODO DOCUMENT ME! 404 * 405 * @param f TODO 406 * 407 * @return TODO 408 * 409 * @throws XMLException TODO 410 */ 411 public static Document parseXMLFromFile(File f) throws XMLException { 412 try { 413 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 414 factory.setNamespaceAware(true); 415 416 DocumentBuilder builder = factory.newDocumentBuilder(); 417 418 return builder.parse(f); 419 } catch (IOException ex) { 420 throw new XMLException("Unable to read from source string", ex); 421 } catch (ParserConfigurationException ex) { 422 throw new XMLException("Unable to configure the parser", ex); 423 } catch (SAXException ex) { 424 throw new XMLException("Unable to parse the input", ex); 425 } 426 } 427 428 /*** 429 * TODO DOCUMENT ME! 430 * 431 * @param source TODO 432 * 433 * @return TODO 434 * 435 * @throws XMLException TODO 436 */ 437 public static Document parseXMLFromString(String source) 438 throws XMLException { 439 InputSource is = new InputSource(new StringReader(source)); 440 441 return parseXMLFromInputSource(is); 442 } 443 444 /*** 445 * TODO DOCUMENT ME! 446 * 447 * @param url TODO 448 * 449 * @return TODO 450 * 451 * @throws XMLException TODO 452 */ 453 public static Document parseXMLFromURL(URL url) throws XMLException { 454 try { 455 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 456 factory.setNamespaceAware(true); 457 458 DocumentBuilder builder = factory.newDocumentBuilder(); 459 URLConnection inConnection = url.openConnection(); 460 461 return builder.parse(inConnection.getInputStream()); 462 } catch (IOException ex) { 463 throw new XMLException("Unable to read from source string", ex); 464 } catch (ParserConfigurationException ex) { 465 throw new XMLException("Unable to configure the parser", ex); 466 } catch (SAXException ex) { 467 throw new XMLException("Unable to parse the input", ex); 468 } 469 } 470 471 /*** 472 * TODO DOCUMENT ME! 473 * 474 * @param url TODO 475 * 476 * @return TODO 477 * 478 * @throws XMLException TODO 479 */ 480 public static Document parseXMLFromURLString(String url) 481 throws XMLException { 482 return parseXMLFromURL(convertStringToURL(url)); 483 } 484 485 /*** 486 * TODO DOCUMENT ME! 487 * 488 * @param xmlDoc TODO 489 * @param xslDoc TODO 490 * 491 * @return TODO 492 * 493 * @throws XMLException TODO 494 */ 495 public static Document transformXML(Document xmlDoc, Document xslDoc) 496 throws XMLException { 497 try { 498 TransformerFactory factory = TransformerFactory.newInstance(); 499 Transformer transformer = factory.newTransformer(new DOMSource(xslDoc, 500 "./" 501 ) 502 ); 503 DOMResult result = new DOMResult(); 504 transformer.transform(new DOMSource(xmlDoc), result); 505 506 org.w3c.dom.Node resultNode = result.getNode(); 507 508 if (resultNode instanceof Document) { 509 return (Document) resultNode; 510 } else { 511 return result.getNode().getOwnerDocument(); 512 } 513 } catch (TransformerConfigurationException ex) { 514 ex.printStackTrace(); 515 throw new XMLException("Unable to perform transform " 516 + ex.getLocationAsString(), ex 517 ); 518 } catch (TransformerException ex) { 519 throw new XMLException("Unable to perform transform " 520 + ex.getLocationAsString(), ex 521 ); 522 } 523 } 524 525 /*** 526 * TODO DOCUMENT ME! 527 * 528 * @param xmlDoc TODO 529 * @param xslDoc TODO 530 * @param params TODO 531 * 532 * @return TODO 533 * 534 * @throws XMLException TODO 535 */ 536 public static Document transformXML(Document xmlDoc, Document xslDoc, 537 XParameters params 538 ) throws XMLException { 539 try { 540 TransformerFactory factory = TransformerFactory.newInstance(); 541 Transformer transformer = factory.newTransformer(new DOMSource(xslDoc)); 542 543 for (Enumeration e = params.keys(); e.hasMoreElements();) { 544 String key = e.nextElement().toString(); 545 transformer.setParameter(key, params.get(key)); 546 } 547 548 DOMResult result = new DOMResult(); 549 transformer.transform(new DOMSource(xmlDoc), result); 550 551 org.w3c.dom.Node resultNode = result.getNode(); 552 553 if (resultNode instanceof Document) { 554 return (Document) resultNode; 555 } else { 556 return result.getNode().getOwnerDocument(); 557 } 558 } catch (TransformerConfigurationException ex) { 559 ex.printStackTrace(); 560 throw new XMLException("Unable to perform transform " 561 + ex.getLocationAsString(), ex 562 ); 563 } catch (TransformerException ex) { 564 throw new XMLException("Unable to perform transform " 565 + ex.getLocationAsString(), ex 566 ); 567 } 568 } 569 570 /*** 571 * TODO DOCUMENT ME! 572 * 573 * @param xmlDoc TODO 574 * @param xslFile TODO 575 * 576 * @return TODO 577 * 578 * @throws XMLException TODO 579 */ 580 public static Document transformXML(Document xmlDoc, File xslFile) 581 throws XMLException { 582 try { 583 TransformerFactory factory = TransformerFactory.newInstance(); 584 Transformer transformer = factory.newTransformer(new StreamSource(xslFile)); 585 586 DOMResult result = new DOMResult(); 587 transformer.transform(new DOMSource(xmlDoc), result); 588 589 org.w3c.dom.Node resultNode = result.getNode(); 590 591 if (resultNode instanceof Document) { 592 return (Document) resultNode; 593 } else { 594 return result.getNode().getOwnerDocument(); 595 } 596 } catch (TransformerConfigurationException ex) { 597 ex.printStackTrace(); 598 throw new XMLException("Unable to perform transform " 599 + ex.getLocationAsString(), ex 600 ); 601 } catch (TransformerException ex) { 602 throw new XMLException("Unable to perform transform " 603 + ex.getLocationAsString(), ex 604 ); 605 } 606 } 607 608 /*** 609 * TODO DOCUMENT ME! 610 * 611 * @param xmlDoc TODO 612 * @param xslFile TODO 613 * @param params TODO 614 * 615 * @return TODO 616 * 617 * @throws XMLException TODO 618 */ 619 public static Document transformXML(Document xmlDoc, File xslFile, 620 XParameters params 621 ) throws XMLException { 622 try { 623 TransformerFactory factory = TransformerFactory.newInstance(); 624 Transformer transformer = factory.newTransformer(new StreamSource(xslFile)); 625 626 for (Enumeration e = params.keys(); e.hasMoreElements();) { 627 String key = e.nextElement().toString(); 628 transformer.setParameter(key, params.get(key)); 629 } 630 631 DOMResult result = new DOMResult(); 632 transformer.transform(new DOMSource(xmlDoc), result); 633 634 org.w3c.dom.Node resultNode = result.getNode(); 635 636 if (resultNode instanceof Document) { 637 return (Document) resultNode; 638 } else { 639 return result.getNode().getOwnerDocument(); 640 } 641 } catch (TransformerConfigurationException ex) { 642 ex.printStackTrace(); 643 throw new XMLException("Unable to perform transform " 644 + ex.getLocationAsString(), ex 645 ); 646 } catch (TransformerException ex) { 647 throw new XMLException("Unable to perform transform " 648 + ex.getLocationAsString(), ex 649 ); 650 } 651 } 652 653 /*** 654 * TODO DOCUMENT ME! 655 * 656 * @param date1 TODO 657 * @param date2 TODO 658 * 659 * @return TODO 660 */ 661 private static String getTimeDiff(Date date1, Date date2) { 662 long ts1 = date1.getTime(); 663 long ts2 = date2.getTime(); 664 long diff = ts2 - ts1; 665 double d = (double) diff / 1000.0; 666 667 return String.valueOf(d) + " seconds"; 668 } 669 670 /*** 671 * TODO DOCUMENT ME! 672 * 673 * @param url TODO 674 * 675 * @return TODO 676 * 677 * @throws XMLException TODO 678 */ 679 private static URL convertStringToURL(String url) throws XMLException { 680 try { 681 return new URL(url); 682 } catch (MalformedURLException murle) { 683 throw new XMLException(url + " is not a well formed URL", murle); 684 } 685 } 686 687 /*** 688 * TODO DOCUMENT ME! 689 * 690 * @param is TODO 691 * 692 * @return TODO 693 * 694 * @throws XMLException TODO 695 */ 696 private static Document parseXMLFromInputSource(InputSource is) 697 throws XMLException { 698 Document doc = null; 699 700 try { 701 DOMParser parser = new DOMParser(); 702 parser.parse(is); 703 doc = parser.getDocument(); 704 } catch (IOException ioe) { 705 throw new XMLException("Unable to read from source string", ioe); 706 } catch (SAXException saxe) { 707 throw new XMLException("Unable to parse the given string", saxe); 708 } 709 710 return doc; 711 } 712 713 /*** 714 * TODO DOCUMENT ME! 715 */ 716 private static void printUsage() { 717 System.out.println("XMLHelper Usage:"); 718 System.out.println("\t~> java XMLHelper xml_input_file xsl_input_file output_file [i18n encoding] ['report']"); 719 System.out.println("\tEx: ~> java XMLHelper XML/user_interests.xml XSL/user_interests_xsl_only.xsl result.html report"); 720 } 721 } 722 723 724 /* 725 * $Log: XMLHelper.java,v $ 726 * Revision 1.1 2003/09/10 09:22:14 bitiboy 727 * *** empty log message *** 728 * 729 * 730 */

This page was automatically generated by Maven